首页 > 代码库 > lucene应用

lucene应用

package com.lin.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class LuceneUtil {

	private Log log = LogFactory.getLog(LuceneUtil.class);
	private IndexWriter writer;
	private IndexReader reader;
	private static Tika tika = new Tika();
	/**
	 * 建立索引
	 * @param srcDriectory		 需要建立索引的文件位置
	 * @param indexDirectory 	索引放置位置
	 * @param analyzer 			解析器
	 * @param version			lucene版本
	 * @param openMode			打开方式(1.创建,2追加,3创建或追加)
	 * @throws IOException
	 * @throws TikaException 
	 */
	@SuppressWarnings("deprecation")
	public void diskIndex(File srcDriectory,File indexDirectory, Analyzer analyzer, Version version,OpenMode openMode )
			throws IOException, TikaException {
		if(!indexDirectory.exists()){
			indexDirectory.mkdirs();
		}
		FSDirectory fsd = FSDirectory.open(indexDirectory);
		IndexWriterConfig config = new IndexWriterConfig(version, analyzer);
		config.setOpenMode(openMode);
		writer = new IndexWriter(fsd, config);
		List<File> files = FileUtil.listFile(srcDriectory);
		Document doc = null;
		for (File file : files) {
			doc = new Document();
			doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
			doc.add(new Field("path", file.getAbsolutePath(), Store.YES,
					Index.NO));
			doc.add(new Field("content", tikaParseFileToString(file), Store.YES,
					Index.ANALYZED));
			writer.addDocument(doc);
		}
		writer.commit();
	}



	
	
	/**
	 * 获取查询把柄
	 * @param indexDirectory
	 * @return
	 * @throws IOException
	 */
	public IndexSearcher getIndexSearch(File indexDiretory) throws IOException{
		Directory directory = FSDirectory.open(indexDiretory);
		return new IndexSearcher(reader.open(directory));
	}
	
	public String search(File indexDirectory,String word,Analyzer analyzer) throws IOException, ParseException{
		IndexSearcher indexSearch = getIndexSearch(indexDirectory);
		QueryParser parser = new QueryParser( "content",analyzer);
		Query query = parser.parse(word);
		TopDocs docs = indexSearch.search(query, 10);
		ScoreDoc[] sds = docs.scoreDocs;
		for(ScoreDoc sd:sds){
			Document document = indexSearch.doc(sd.doc);
			System.out.println("name==========="+document.get("name")+"path==========="+document.get("path")
					);
		}
		return null;
	}
	public String tikaParseFileToString(File file) throws IOException, TikaException{
		return tika.parseToString(file);
	}
	public static void main(String[] args)throws Exception {
			//new LuceneUtil().diskIndex(new File("d:\\lucene"), new File("d:\\luceneIndex"), new IKAnalyzer(), Version.LUCENE_4_10_2, OpenMode.CREATE);
			new LuceneUtil().search(new File("d:\\luceneIndex"),"接口",new IKAnalyzer());
		Tika tika = new Tika();
		String str = tika.parseToString(new FileInputStream("d:\\lucene\\IKAnalyzer中文分词器V2012_FF使用手册.pdf"));
		System.out.println(str);
	}
}

项目依赖使用maven:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>com.lin.project</groupId>
	<artifactId>learn</artifactId>
	<packaging>war</packaging>
	<version>0.0.1-SNAPSHOT</version>
	<name>mybatis Maven Webapp</name>
	<url>http://maven.apache.org</url>
	<properties>
		<redis.clients.version>2.6.0</redis.clients.version>
		<spring.data.redis.version>1.4.0.RELEASE</spring.data.redis.version>
	</properties>
	<dependencies>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>3.8.1</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>commons-logging</groupId>
			<artifactId>commons-logging</artifactId>
			<version>1.2</version>
		</dependency>
		<dependency>
			<groupId>log4j</groupId>
			<artifactId>log4j</artifactId>
			<version>1.2.17</version>
		</dependency>
		<dependency>
			<groupId>commons-dbcp</groupId>
			<artifactId>commons-dbcp</artifactId>
			<version>1.4</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis</artifactId>
			<version>3.2.7</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis-spring</artifactId>
			<version>1.2.2</version>
		</dependency>
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-core</artifactId>
			<version>4.0.6.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-beans</artifactId>
			<version>4.0.6.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-tx</artifactId>
			<version>4.0.6.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-aop</artifactId>
			<version>4.0.6.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-jdbc</artifactId>
			<version>4.0.6.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-webmvc</artifactId>
			<version>4.0.6.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-web</artifactId>
			<version>4.0.6.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-context-support</artifactId>
			<version>4.0.6.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-orm</artifactId>
			<version>4.0.6.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-test</artifactId>
			<version>4.0.6.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.aspectj</groupId>
			<artifactId>aspectjweaver</artifactId>
			<version>1.8.2</version>
		</dependency>
		<dependency>
			<groupId>jstl</groupId>
			<artifactId>jstl</artifactId>
			<version>1.2</version>
		</dependency>
		<dependency>
			<groupId>taglibs</groupId>
			<artifactId>standard</artifactId>
			<version>1.1.2</version>
		</dependency>

		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.32</version>
		</dependency>
		<dependency>
			<groupId>org.quartz-scheduler</groupId>
			<artifactId>quartz</artifactId>
			<version>2.2.1</version>
		</dependency>
		<dependency>
			<groupId>org.quartz-scheduler</groupId>
			<artifactId>quartz-jobs</artifactId>
			<version>2.2.1</version>
		</dependency>
		<dependency>
			<groupId>org.codehaus.jackson</groupId>
			<artifactId>jackson-core-asl</artifactId>
			<version>1.9.13</version>
		</dependency>
		<dependency>
			<groupId>org.codehaus.jackson</groupId>
			<artifactId>jackson-mapper-asl</artifactId>
			<version>1.9.13</version>
		</dependency>
		<dependency>
			<groupId>commons-fileupload</groupId>
			<artifactId>commons-fileupload</artifactId>
			<version>1.3.1</version>
		</dependency>
		<dependency>
			<groupId>redis.clients</groupId>
			<artifactId>jedis</artifactId>
			<version>${redis.clients.version}</version>
			<type>jar</type>
			<scope>compile</scope>
		</dependency>
		<dependency>
			<groupId>org.springframework.data</groupId>
			<artifactId>spring-data-redis</artifactId>
			<version>${spring.data.redis.version}</version>
		</dependency>
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-core</artifactId>
			<version>4.10.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-highlighter</artifactId>
			<version>4.10.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-queryparser</artifactId>
			<version>4.10.2</version>
		</dependency>
		<dependency>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-analyzers-common</artifactId>
			<version>4.10.2</version>
		</dependency>
		<!-- <dependency>
			<groupId>org.apache.tika</groupId>
			<artifactId>tika-app</artifactId>
			<version>1.6</version>
		</dependency> -->
	</dependencies>
	<build>
		<finalName>learn</finalName>
	</build>
</project>

添加额外tika和IKAnalyzer的jar包

http://pan.baidu.com/s/1o69fCeQ 提取码:122b

http://pan.baidu.com/s/1hq6AalY 提取码:k3xp

lucene应用