首页 > 代码库 > 一:luecne初体验

一:luecne初体验

package com.cmy.lucene.lucene;

import java.io.File;
import java.io.FileReader;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Indexer {

    private IndexWriter writer;
    
    /**
     * 构造方法,实例化indexwriter
     * @param indexDir
     * @throws Exception
     */
    public Indexer(String indexDir) throws Exception{
        Directory directory  = FSDirectory.open(Paths.get(indexDir));
        Analyzer analyzer = new StandardAnalyzer();//标准分词器
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        writer = new IndexWriter(directory, indexWriterConfig);
    }
    
    /**
     * 
     * @throws Exception
     */
    public void close() throws Exception{
        writer.close();
    }
    
    /**
     * 
     * @param dataDir
     * @throws Exception
     */
    public int index(String dataDir) throws Exception{
        File []files = new File(dataDir).listFiles();
        for(File file:files){
            IndexFile(file);
        }
        return writer.numDocs();//返回索引文件的数量
    }

    /**
     * 索引指定文件
     * @param file
     * @throws Exception 
     */
    private void IndexFile(File file) throws Exception {
        System.out.println("索引文件:"+file.getCanonicalPath());//返回规范化的绝对路径
        Document document = getDocument(file);
        writer.addDocument(document);;
    }

    /**
     * 获取文档,文档里再设置每个字段
     * @param file
     * @return 
     */
    private Document getDocument(File file) throws Exception{
        Document document = new Document();//定义文档对象
        document.add(new TextField("contents",new FileReader(file)));//在文档中引入字段(key,value)形式
        document.add(new TextField("fileName",file.getName(),Field.Store.YES));
        document.add(new TextField("fullPath",file.getCanonicalPath(),Field.Store.YES));
        return document;
    }
    
    public static void main(String[] args) {
        String indexDir = "D:\\lucene";
        String dataDir = "E:\\JavaEE\\luceneData";
        Indexer indexer = null;
        int numIndexed = 0;
        long start = System.currentTimeMillis();
        try {
            indexer = new Indexer(indexDir);
            numIndexed = indexer.index(dataDir);
        } catch (Exception e) {
            e.printStackTrace();
            e.printStackTrace();
        }finally {
            try {
                indexer.close();
            } catch (Exception e2) {
                e2.printStackTrace();
            }
        }
        long end = System.currentTimeMillis();
        System.out.println("索引: "+numIndexed+" 个文件,话费了"+(end-start)+" s");
    }
}

技术分享

package com.cmy.lucene.lucene;

import java.nio.channels.ScatteringByteChannel;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class Searcher {

    public static void search(String indexDir,String qString) throws Exception{
        
        Directory directory = FSDirectory.open(Paths.get(indexDir));
        IndexReader reader = DirectoryReader.open(directory);//读取完整路径下的reader
        IndexSearcher iSearcher = new IndexSearcher(reader);//索引查询器,参数是Indexreader
        Analyzer analyzer = new StandardAnalyzer();//标准分词器
        QueryParser parser = new QueryParser("contents", analyzer);//解析制定内容,使用制定分词器
        Query query = parser.parse(qString);
        long start = System.currentTimeMillis();
        TopDocs hits = iSearcher.search(query, 10);//传入query对象,返回的数据数量,此处返回前十条,哎,那总该有个顺序吧,怎么搞
        long end = System.currentTimeMillis();
        System.out.println("匹配"+qString+",总共花费"+(end-start)+" 毫秒");
        //遍历结果集,获取文档
        for(ScoreDoc scoreDoc:hits.scoreDocs){
            Document document = iSearcher.doc(scoreDoc.doc);//获取结果集中的doc主键(id)并据此查询获取文档对象
            System.out.println("fullPath: "+document.get("fullPath"));//获取完整的fullPath,
            
        }
        reader.close();
    }
    public static void main(String[] args) {
        String indexDir = "D:\\lucene";
        String dataDir = "Zygmunt Saloni";
        try {
            search(indexDir,dataDir);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

技术分享

 

一:luecne初体验