首页 > 代码库 > Lucene 4.9索引txt文件

Lucene 4.9索引txt文件

暂时只是跑起来了,不知道是否正确,困了,睡觉了,改天再弄。搜索那块是分页的,也没仔细弄。。。

参考着 http://blog.csdn.net/kingskyleader/article/details/8444739

在data下放了三个txt...

S:\lucene\data\永生.txt

S:\lucene\data\1.txt

S:\lucene\data\2.txt

 

永生是本小说,汉语的应该没有英文。

1.txt 内容: hello

2.txt 内容: hi hello  哈哈

 

程序运行之后控制台打印的信息:

adding [Ljava.io.File;@3f611531adding [Ljava.io.File;@3f611531adding [Ljava.io.File;@3f611531S:\lucene\data\1.txt1407857427736S:\lucene\data\2.txt1407857444245

具体改天再研究。

下面是代码:

pom:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  <modelVersion>4.0.0</modelVersion>  <groupId>LuceneTest</groupId>  <artifactId>lucene</artifactId>  <version>0.0.1-SNAPSHOT</version>  <packaging>jar</packaging>  <name>lucene</name>  <url>http://maven.apache.org</url>  <properties>    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>  </properties>  <dependencies>    <dependency>      <groupId>junit</groupId>      <artifactId>junit</artifactId>      <version>3.8.1</version>      <scope>test</scope>    </dependency>        <!-- lucene -->    <dependency>    <groupId>org.apache.lucene</groupId>    <artifactId>lucene-core</artifactId>    <version>4.9.0</version>    </dependency>        <dependency>    <groupId>org.apache.lucene</groupId>    <artifactId>lucene-queryparser</artifactId>    <version>4.9.0</version>    </dependency>        <dependency>    <groupId>org.apache.lucene</groupId>    <artifactId>lucene-analyzers-common</artifactId>    <version>4.9.0</version>    </dependency>        <dependency>    <groupId>org.apache.lucene</groupId>    <artifactId>lucene-highlighter</artifactId>    <version>4.9.0</version>    </dependency>      </dependencies></project>

 

建立索引:

package lucene;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.index.Term;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class InitIndex {    public void creatIndex() throws IOException {        boolean create = true;        File data = new File("S:\\lucene\\data");        File index = new File("S:\\lucene\\index");        Directory dir = FSDirectory.open(index);        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9,                analyzer);        if (create) {            // Create a new index in the directory, removing any            // previously indexed documents:            iwc.setOpenMode(OpenMode.CREATE);        } else {            // Add new documents to an existing index:            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);        }        IndexWriter iw = new IndexWriter(dir, iwc);        File[] file = data.listFiles();        FileInputStream fis = null;        for (File f : file) {                        fis = new FileInputStream(f);            Document doc = new Document();            Field pathField = new StringField("path", f.getPath(),                    Field.Store.YES);            doc.add(pathField);            doc.add(new LongField("modified", f.lastModified(), Field.Store.YES));            doc.add(new TextField("contents", new BufferedReader(                    new InputStreamReader(fis, "GBK"))));            if (iw.getConfig().getOpenMode() == OpenMode.CREATE) {                // New index, so we just add the document (no old document can                // be there):                System.out.println("adding " + file);                iw.addDocument(doc);            } else {                // Existing index (an old copy of this document may have been                // indexed) so                // we use updateDocument instead to replace the old one matching                // the exact                // path, if present:                System.out.println("updating " + file);                iw.updateDocument(new Term("path", f.getPath()), doc);            }        }        iw.close();        fis.close();            }}

搜索:

package lucene;import java.io.File;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class Search {    public void query() throws IOException, ParseException {        String queries = "hello";        int hitsPerPage = 10;                 File index = new File("S:\\lucene\\index");        IndexReader reader = DirectoryReader.open(FSDirectory.open(index));        IndexSearcher searcher = new IndexSearcher(reader);        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);                QueryParser parser = new QueryParser(Version.LUCENE_4_9, "contents", analyzer);          Query query = parser.parse(queries);                TopDocs results = searcher.search(query, 5 * hitsPerPage);        ScoreDoc[] hits = results.scoreDocs;          int numTotalHits = results.totalHits;                           int start = 0;          int end = Math.min(numTotalHits, hitsPerPage);                  for (int i = start; i < end; i++) {                    Document doc = searcher.doc(hits[i].doc);              String path = doc.get("path");             System.out.println(path);           String modified=doc.get("modified");           System.out.println(modified);                                  }                      }}

主函数:

package lucene;import java.io.IOException;import org.apache.lucene.queryparser.classic.ParseException;public class Main {    public static void main(String args[]) throws IOException, ParseException{        InitIndex id=new InitIndex();        id.creatIndex();        Search se=new Search();        se.query();    }}