首页 > 代码库 > Lucene 4.9索引txt文件
Lucene 4.9索引txt文件
暂时只是跑起来了,不知道是否正确,困了,睡觉了,改天再弄。搜索那块是分页的,也没仔细弄。。。
参考着 http://blog.csdn.net/kingskyleader/article/details/8444739
在data下放了三个txt...
S:\lucene\data\永生.txt
S:\lucene\data\1.txt
S:\lucene\data\2.txt
永生是本小说,汉语的应该没有英文。
1.txt 内容: hello
2.txt 内容: hi hello 哈哈
程序运行之后控制台打印的信息:
adding [Ljava.io.File;@3f611531adding [Ljava.io.File;@3f611531adding [Ljava.io.File;@3f611531S:\lucene\data\1.txt1407857427736S:\lucene\data\2.txt1407857444245
具体改天再研究。
下面是代码:
pom:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>LuceneTest</groupId> <artifactId>lucene</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>lucene</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <!-- lucene --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>4.9.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>4.9.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>4.9.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>4.9.0</version> </dependency> </dependencies></project>
建立索引:
package lucene;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.index.Term;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class InitIndex { public void creatIndex() throws IOException { boolean create = true; File data = new File("S:\\lucene\\data"); File index = new File("S:\\lucene\\index"); Directory dir = FSDirectory.open(index); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter iw = new IndexWriter(dir, iwc); File[] file = data.listFiles(); FileInputStream fis = null; for (File f : file) { fis = new FileInputStream(f); Document doc = new Document(); Field pathField = new StringField("path", f.getPath(), Field.Store.YES); doc.add(pathField); doc.add(new LongField("modified", f.lastModified(), Field.Store.YES)); doc.add(new TextField("contents", new BufferedReader( new InputStreamReader(fis, "GBK")))); if (iw.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): System.out.println("adding " + file); iw.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed) so // we use updateDocument instead to replace the old one matching // the exact // path, if present: System.out.println("updating " + file); iw.updateDocument(new Term("path", f.getPath()), doc); } } iw.close(); fis.close(); }}
搜索:
package lucene;import java.io.File;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class Search { public void query() throws IOException, ParseException { String queries = "hello"; int hitsPerPage = 10; File index = new File("S:\\lucene\\index"); IndexReader reader = DirectoryReader.open(FSDirectory.open(index)); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "contents", analyzer); Query query = parser.parse(queries); TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; int start = 0; int end = Math.min(numTotalHits, hitsPerPage); for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); System.out.println(path); String modified=doc.get("modified"); System.out.println(modified); } }}
主函数:
package lucene;import java.io.IOException;import org.apache.lucene.queryparser.classic.ParseException;public class Main { public static void main(String args[]) throws IOException, ParseException{ InitIndex id=new InitIndex(); id.creatIndex(); Search se=new Search(); se.query(); }}
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。